library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                             names_to = "Date", values_to = "Confirmed") 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Let's get the times series data for deaths

time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long),
               names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Create Keys 

time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)

# Join tables

time_series_long_joined <- full_join(time_series_confirmed_long,
    time_series_deaths_long, by = c("Key")) %>% 
    select(-Key)

# Reformat the data

time_series_long_joined$Date <- mdy(time_series_long_joined$Date)

# Create Report table with counts

time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")
# Plot graph to a pdf outputfile

pdf("images/time_series_example_plot.pdf", width=6, height=3)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")
dev.off()
## png 
##   2
# Plot graph to a png outputfile

ppi <- 300
png("images/time_series_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")
dev.off()
## png 
##   2
# This is the RMarkdown style for inserting images:

US COVID-19 Deaths

# This is an alternative way using html:

US COVID-19 Deaths

#Interactive Graphs

library("plotly")
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#There are two common formats used in graphing that you may come across in examples
#Version 1

ggplotly(
  time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
      geom_point() +
      geom_line() +
      ggtitle("US COVID-19 Deaths")
 )
#Version 2

US_deaths <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US")
 p <- ggplot(data = US_deaths, aes(x = Date,  y = Deaths)) + 
        geom_point() +
        geom_line() +
        ggtitle("US COVID-19 Deaths")
ggplotly(p)

Animated Graphs with gganimate

library("gganimate")
library("transformr")
theme_set(theme_bw())
### An animation of the confirmed cases in select countries
data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 

# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)

Covid-19 Data: 6/13-9/13

Confirmed_State_6_13<-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/06-13-2020.csv")) %>%
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
Confirmed_State_9_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-13-2020.csv")) %>% 
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
Confirmed_State_9_13 <- Confirmed_State_9_13 %>% 
  filter(Province_State != "Recovered") 
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State"))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Alabama        US                     24601 US                    138755
## 2 Alaska         US                       653 US                      6268
## 3 Arizona        US                     34660 US                    208512
## 4 Arkansas       US                     12095 US                     70219
## 5 California     US                    150018 US                    761728
## 6 Colorado       US                     29002 US                     61293
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State")) %>% 
      rename(Confirmed_6_13_2020 = "Confirmed.x", Confirmed_9_13_2020 = "Confirmed.y") %>% 
      select(-Country_Region.x, -Country_Region.y) %>% 
      replace_na(list(Confirmed_6_13_2020 = 0))

head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 3
## # Groups:   Province_State [6]
##   Province_State Confirmed_6_13_2020 Confirmed_9_13_2020
##   <chr>                        <dbl>               <dbl>
## 1 Alabama                      24601              138755
## 2 Alaska                         653                6268
## 3 Arizona                      34660              208512
## 4 Arkansas                     12095               70219
## 5 California                  150018              761728
## 6 Colorado                     29002               61293
# which(is.na(Confirmed_State_6_13_9_13_joined))

Confirmed_State_6_13_9_13_joined_long <- Confirmed_State_6_13_9_13_joined %>% 
              pivot_longer(-c(Province_State),
                            names_to = "Date", values_to = "Confirmed")

Challenge 1

#Print a graph (different from the one above) to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format.

ppi <- 300
png("us_confirmed_covid_cases_State_6_13_9_13.png", width=3*ppi, height=3*ppi, res=ppi)
ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Confirmed,  y = Province_State))  + geom_bar(stat="identity",aes(color = Date)) +  labs(title="COVID-19 Confirmed Cases in US",
        x ="Number of Confirmed Cases", y = "State/Province in US")
## Warning: Removed 1 rows containing missing values (position_stack).
dev.off()
## png 
##   2

Challenge 2

#Turn one of the exercises from Lab 5 into an interactive graph with plotyly

ggplotly(ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Confirmed,  y = Province_State))  + geom_bar(stat="identity",aes(color = Date)) +  labs(title="COVID-19 Confirmed Cases in US",
        x ="Number of Confirmed Cases", y = "State/Province in US"))
## Warning: Removed 1 rows containing missing values (position_stack).

Challenge 3

#Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.

data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("US","Brazil","India","Italy", 
                                "Mexico","Spain","France","Iran","Peru","UK")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
     geom_point() +
     geom_line() +
     ggtitle("Top 10 Countries W/ COVID-19 Deaths") +
     geom_point(aes(group = seq_along(Date))) + transition_reveal(Date) 

# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)

Application written in R (R Core Team 2015) using the Shiny framework (Chang et al. 2015).

References

Chang, W., J. Cheng, JJ. Allaire, Y. Xie, and J. McPherson. 2015. “Shiny: Web Application Framework for R. R Package Version 0.12.1.” Computer Program. http://CRAN.R-project.org/package=shiny.

R Core Team. 2015. “R: A Language and Environment for Statistical Computing.” Journal Article. http://www.R-project.org.